Username password error

I have this code in my demo chatbot

import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch(show_error=True)

I am calling this with my offline code:

  GNU nano 8.3                                                              test.py                                                                          
from gradio_client import Client

client = Client("vernalsystems/SYNLIR-Svelhana",
                hf_token='hf_ding_dong')
result = client.predict(
                message="""

What is the capital of Canada

                """,
                system_message="You are a friendly Chatbot.",
                max_tokens=1024,
                temperature=0.7,
                top_p=0.95,
                api_name="/chat"
)
print(result)

My space is PUBLIC.
I get this error:

Loaded as API: https://vernalsystems-synlir-svelhana.hf.space ✔
Traceback (most recent call last):
  File "/home/monsoon/test.py", line 5, in <module>
    result = client.predict(
                message="""
    ...<19 lines>...
                api_name="/chat"
    )
  File "/usr/lib/python3.13/site-packages/gradio_client/client.py", line 478, in predict
    ).result()
      ~~~~~~^^
  File "/usr/lib/python3.13/site-packages/gradio_client/client.py", line 1539, in result
    return super().result(timeout=timeout)
           ~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.13/concurrent/futures/_base.py", line 456, in result
    return self.__get_result()
           ~~~~~~~~~~~~~~~~~^^
  File "/usr/lib/python3.13/concurrent/futures/_base.py", line 401, in __get_result
    raise self._exception
  File "/usr/lib/python3.13/concurrent/futures/thread.py", line 59, in run
    result = self.fn(*self.args, **self.kwargs)
  File "/usr/lib/python3.13/site-packages/gradio_client/client.py", line 1158, in _inner
    predictions = _predict(*data)
  File "/usr/lib/python3.13/site-packages/gradio_client/client.py", line 1275, in _predict
    raise AppError(
    ...<2 lines>...
    )
gradio_client.exceptions.AppError: The upstream Gradio app has raised an exception: 401 Client Error: Unauthorized for url: https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta/v1/chat/completions (Request ID: Root=1-683c7c60-74ad22f650f55fe8535d95ce;531fd494-5c44-4726-a7c8-cf6b99f8cf26)

Invalid username or password.

What can I do? Please help. Note, token and URL are correct

1 Like

A token is passed to the Gradio server, but it may not be used for inference. Passing the token as an argument is reliable, but it seems redundant…




def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    hf_token,
):
    messages = [{"role": "system", "content": system_message}]

    client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=hf_token)

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response